# "Efficiency and water use: Dynamic effects of irrigation technology adoption"
# by Micah Cameron-Harp and Nathan Hendricks
# 
# Code written by Micah Cameron-Harp
# May 16th, 2024
# 
# This r script estimates the effects of the two irrigation technology transitions
#   on the percent of water right groups' acreage allocated to five crops using
#   the Callaway and Sant'Anna (2020) estimator. The results are displayed
#   in Table C.5 within the appendix. 
pacman::p_load(
  did,
  tidyverse,
  openxlsx,
  dplyr)

# Parse arguments submitted by STATA
args = commandArgs(trailingOnly = "TRUE")
arg1 <- args[1]

#Define directories and set working directory
# NOTE - The root directory will be passed here from the main_text_results.do file 
dr_root <- paste0(arg1)
dr_data <- paste0(dr_root, "/data")
dr_output = paste0(dr_root, "/outputs")
dr_output_main = paste0(dr_root, "/outputs/main_text")
dr_output_app = paste0(dr_root, "/outputs/appendices")
dr_output_log = paste0(dr_root, "/outputs/logs")
dr_temp = paste0(dr_root, "/data/intermediate")
setwd(dr_temp)

########################################
#---------- Flood to Center Pivot or LEPA -----#
wrg_flood_cplepa <- read.csv(file = file.path(dr_temp, "flood_cporlepa_prepped.csv"))

#Filter the data to only include the years before 2016
wrg_flood_cplepa <- wrg_flood_cplepa %>% dplyr::filter(wua_year <= 2015) %>%
  dplyr::mutate(corn_or_soy_acres = corn_acres + soy_acres,
                perc_soy = soy_acres/acres_irr, 
                perc_corn = corn_acres/acres_irr,
                perc_wheat = wheat_acres/acres_irr,
                perc_sorghum = sorghum_acres/acres_irr,
                perc_alfalfa = alfalfa_acres/acres_irr)

#Create a new dataframe to store the results in 
  df_template <- data.frame(matrix(ncol=6,nrow=0, 
                                            dimnames=list(NULL, c("dep_var", "effect_estimated",
                                                                  "estimate", "se_estimate", 
                                                                	"c_crit_val", "panel_mean_dep_var"))))
  #Make one version for the ATT's
  att_df <- df_template
  #Make another for the cohort effects
  grp_df <- df_template
  #Next version for the event study coefficients
  dyn_df <- df_template
  
  #List the dependent variables
  effect_order <- c("perc_soy", "perc_corn", "perc_wheat", "perc_sorghum", "perc_alfalfa")
  
  for (i in effect_order) {
    dep_var <- i
      #Make a df of just the dep var so we can get its mean value
      df_dep_var <- wrg_flood_cplepa %>% dplyr::select(all_of(dep_var))
      mean_dep_var <- mean(df_dep_var[,1], na.rm = TRUE)
    #Run att_gt Callaway and Sant'Anna using Not Yet Treated as Control Group
    sharp_floodcplepa_attgt_nyt <- att_gt(yname = dep_var,
                                      tname = "wua_year",
                                      idname = "WR_GROUP",
                                      gname = "cohort_flood_cplepa",
                                      data = wrg_flood_cplepa,
                                      panel = TRUE,
                                      allow_unbalanced_panel = TRUE,
                                      xformla=~hyd_cond+predev_sat+awc+sandtotal+silttotal+init_af_used,
                                      est_method = "dr",
                                      control_group = c("notyettreated"),
                                      anticipation=0,
                                      clustervars = "WR_GROUP", 
                                      biters=1000
    )
    #Group specific effects to get overall ATT
    agg.gs <- aggte(sharp_floodcplepa_attgt_nyt, type = "group", na.rm = TRUE, min_e = 0, max_e = Inf)
      #Store ATT  
      att_df[nrow(att_df)+1,] <- c(dep_var, "group_agg_effect_average", agg.gs["overall.att"], agg.gs["overall.se"], agg.gs["crit.val.egt"], mean_dep_var) 
      #Store cohort effects
      agg.gs <- aggte(sharp_floodcplepa_attgt_nyt, type = "group", na.rm = TRUE, min_e = -Inf, max_e = Inf)
      group_order <- c(agg.gs[["egt"]])
      g_est_lst <- c(agg.gs[["att.egt"]])
      g_se_lst <- c(agg.gs[["se.egt"]])
      g_c <- agg.gs$crit.val.egt
      for (i in 1:length(group_order)) {
        grp_df[nrow(grp_df)+1,] <- c(dep_var, group_order[i], g_est_lst[i], g_se_lst[i], g_c, mean_dep_var)
      }

    #Dynamic effects
    agg.es <- aggte(sharp_floodcplepa_attgt_nyt, type = "dynamic", na.rm = TRUE, min_e = 0, max_e = Inf)
      #Store the att  
      att_df[nrow(att_df)+1,] <- c(dep_var, "event_agg_effect_average", agg.es["overall.att"], agg.es["overall.se"], agg.es["crit.val.egt"], mean_dep_var) 
      #Store the event study coefficients
      agg.es <- aggte(sharp_floodcplepa_attgt_nyt, type = "dynamic", na.rm = TRUE, min_e = -Inf, max_e = Inf)
      effect_order <- c(agg.es[["egt"]])
      est_lst <- c(agg.es[["att.egt"]])
      se_lst <- c(agg.es[["se.egt"]])
      e_c <- agg.es$crit.val.egt
      for (i in 1:length(effect_order)) {
        dyn_df[nrow(dyn_df)+1,] <- c(dep_var, effect_order[i], est_lst[i], se_lst[i], e_c, mean_dep_var)
      }
  }
  #Create the 95% ci variables
  att_df <- att_df %>%  mutate(ci_95_lb = estimate - c_crit_val*se_estimate,
                               ci_95_ub = estimate + c_crit_val*se_estimate)
  dyn_df <- dyn_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                               ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate))
  grp_df <- grp_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                               ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate))
  #Save as excel workbook
  dataset_names <- list('cs_att' = att_df, 'cs_event' = dyn_df, 'cs_group' = grp_df)
  write.xlsx(dataset_names, file.path(dr_temp, "/cs_floodtocplepa_perccrops.xlsx"))
 
  #Remove excess objects
  rm(wrg_flood_cplepa, att_df, grp_df, dyn_df, sharp_floodcplepa_attgt_nyt)
  gc()
  
  #----------------------------------------------# 
  #---------- Center Pivot to LEPA --------------#
  #----------------------------------------------# 
  wrg_cp_lepa <- read.csv(file = file.path(dr_temp, "cp_lepa_prepped.csv"))
  
  #Make variable showing corn or soy acres
  wrg_cp_lepa <- wrg_cp_lepa %>%
    dplyr::mutate(corn_or_soy_acres = corn_acres + soy_acres,
                perc_soy = soy_acres/acres_irr, 
                perc_corn = corn_acres/acres_irr,
                perc_wheat = wheat_acres/acres_irr,
                perc_sorghum = sorghum_acres/acres_irr,
                perc_alfalfa = alfalfa_acres/acres_irr)
  
  #Create a new dataframe to store the results in 
  df_template <- data.frame(matrix(ncol=6,nrow=0, 
                                   dimnames=list(NULL, c("dep_var", "effect_estimated",
                                                         "estimate", "se_estimate", 
                                                         "c_crit_val", "panel_mean_dep_var"))))
  #Make one version for the ATT's
  att_df <- df_template
  #Make another for the cohort effects
  grp_df <- df_template
  #Next version for the event study coefficients
  dyn_df <- df_template
  
  #List the dependent variables
  effect_order <- c("perc_soy", "perc_corn", "perc_wheat", "perc_sorghum", "perc_alfalfa")
  
  for (i in effect_order) {
    dep_var <- i
    #Make a df of just the dep var so we can get its mean value
    df_dep_var <- wrg_cp_lepa %>% dplyr::select(all_of(dep_var))
    mean_dep_var <- mean(df_dep_var[,1], na.rm = TRUE)
    #Run att_gt Callaway and Sant'Anna using Not Yet Treated as Control Group
    sharp_cplepa_attgt_nyt <- att_gt(yname = dep_var,
                                          tname = "wua_year",
                                          idname = "WR_GROUP",
                                          gname = "cohort_cp_lepa",
                                          data = wrg_cp_lepa,
                                          panel = TRUE,
                                          allow_unbalanced_panel = TRUE,
                                          # xformla = NULL,
                                          xformla=~hyd_cond+predev_sat+sandtotal+silttotal+init_af_used,
                                          est_method = "dr",
                                          control_group = c("notyettreated"),
                                          anticipation=0,
                                          clustervars = "WR_GROUP", 
                                          biters=1000
    )
    #Save the raw, individual ATT_GT estimates
    cband_text1a <- paste0(100*(1-sharp_cplepa_attgt_nyt$alp),"% ")
    cband_text1b <- ifelse(sharp_cplepa_attgt_nyt$DIDparams$bstrap,
                           ifelse(sharp_cplepa_attgt_nyt$DIDparams$cband, "Simult. ", "Pointwise "),
                           "Pointwise ")
    cband_text1 <- paste0("[", cband_text1a, cband_text1b)
    cband_lower <- sharp_cplepa_attgt_nyt$att - sharp_cplepa_attgt_nyt$c*sharp_cplepa_attgt_nyt$se
    cband_upper <- sharp_cplepa_attgt_nyt$att + sharp_cplepa_attgt_nyt$c*sharp_cplepa_attgt_nyt$se
    
    sig <- (cband_upper < 0) | (cband_lower > 0)
    sig[is.na(sig)] <- FALSE
    sig_text <- ifelse(sig, "*", "")
    
    out <- cbind.data.frame(sharp_cplepa_attgt_nyt$group, sharp_cplepa_attgt_nyt$t, sharp_cplepa_attgt_nyt$att, sharp_cplepa_attgt_nyt$se, cband_lower, cband_upper)
    out <- cbind.data.frame(out, sig_text)
    colnames(out) <- c("Group", "Time", "ATT(g,t)","Std. Error", cband_text1, "Conf. Band]", "Significant")
    saveRDS(out, file = paste0("C:/Users/Micah/Dropbox/Irrigation technology transition/2nd round of revisions/final code/r_scripts/output/tables/all_attgt_cptolepa_", i, ".rds"))
    
    #Group specific effects to get overall ATT
    agg.gs <- aggte(sharp_cplepa_attgt_nyt, type = "group", na.rm = TRUE, min_e = 0, max_e = Inf)
    #Store ATT  
    att_df[nrow(att_df)+1,] <- c(dep_var, "group_agg_effect_average", agg.gs["overall.att"], agg.gs["overall.se"], agg.gs["crit.val.egt"], mean_dep_var) 
    #Store cohort effects
    agg.gs <- aggte(sharp_cplepa_attgt_nyt, type = "group", na.rm = TRUE, min_e = -Inf, max_e = Inf)
    group_order <- c(agg.gs[["egt"]])
    g_est_lst <- c(agg.gs[["att.egt"]])
    g_se_lst <- c(agg.gs[["se.egt"]])
    g_c <- agg.gs$crit.val.egt
    for (i in 1:length(group_order)) {
      grp_df[nrow(grp_df)+1,] <- c(dep_var, group_order[i], g_est_lst[i], g_se_lst[i], g_c, mean_dep_var)
    }
    
    #Dynamic effects
    agg.es <- aggte(sharp_cplepa_attgt_nyt, type = "dynamic", na.rm = TRUE, min_e = 0, max_e = Inf)
    #Store the att  
    att_df[nrow(att_df)+1,] <- c(dep_var, "event_agg_effect_average", agg.es["overall.att"], agg.es["overall.se"], agg.es["crit.val.egt"], mean_dep_var) 
    #Store the event study coefficients
    agg.es <- aggte(sharp_cplepa_attgt_nyt, type = "dynamic", na.rm = TRUE, min_e = -Inf, max_e = Inf)
    effect_order <- c(agg.es[["egt"]])
    est_lst <- c(agg.es[["att.egt"]])
    se_lst <- c(agg.es[["se.egt"]])
    e_c <- agg.es$crit.val.egt
    for (i in 1:length(effect_order)) {
      dyn_df[nrow(dyn_df)+1,] <- c(dep_var, effect_order[i], est_lst[i], se_lst[i], e_c, mean_dep_var)
    }
  }
  #Create the 95% ci variables
  att_df <- att_df %>%  mutate(ci_95_lb = estimate - c_crit_val*se_estimate,
                               ci_95_ub = estimate + c_crit_val*se_estimate)
  dyn_df <- dyn_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                               ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate))
  grp_df <- grp_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                               ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate))
  #Save as excel workbook
  dataset_names <- list('cs_att' = att_df, 'cs_event' = dyn_df, 'cs_group' = grp_df)
  write.xlsx(dataset_names, file.path(dr_temp, "/cs_cptolepa_perccrops.xlsx"))

  #Remove excess objects
  rm(wrg_cp_lepa, att_df, grp_df, dyn_df, sharp_cplepa_attgt_nyt)
  gc()
 
############### Table C.5 ###########################
  #Merge the ATT estimates for the two transitions into one spreadsheet.
  flood_cplepa_crop_att <- read.xlsx(paste0(dr_temp, "/cs_floodtocplepa_perccrops.xlsx"), sheet = "cs_att") %>%
    dplyr::filter(effect_estimated == "group_agg_effect_average") %>%
    dplyr::mutate(transition = "flood to center pivot or LEPA")
  cp_lepa_crop_att <- read.xlsx(paste0(dr_temp, "/cs_cptolepa_perccrops.xlsx"), sheet = "cs_att") %>%
    dplyr::filter(effect_estimated == "group_agg_effect_average") %>%
    dplyr::mutate(transition = "center pivot to LEPA")
  output_df <- rbind(flood_cplepa_crop_att, cp_lepa_crop_att) 
  #save
  write.csv(output_df, file = file.path(dr_output_app, "/tableC5.csv"))
  